Data source: www.covidtracking.com

library(tidyverse)
library(themebg)

raw_state <- read_csv("https://covidtracking.com/api/states/daily.csv") %>%
    mutate_at("date", as.character) %>%
    mutate_at("date", parse_date, format = "%Y%m%d") %>%
    arrange(date, state) %>%
    mutate(str_date = as.character(date))

raw_state_info <- read_csv("https://covidtracking.com/api/states/info.csv")

df_state_info <- raw_state_info %>%
    select(state, name)

raw_usa <- read_csv("https://covidtracking.com/api/us/daily.csv") %>%
    mutate_at("date", as.character) %>%
    mutate_at("date", parse_date, format = "%Y%m%d")  %>%
    mutate(str_date = as.character(date))

raw_lockdown <- read_csv(
    "../data/raw/kaggle_jcyzag/countryLockdowndates.csv",
    skip = 1,
    col_names = c("country", "name", "lockdown_date", "lockdown", "reference")
) %>%
    mutate_at("lockdown_date", parse_date, format = "%d/%m/%Y")

df_state_lockdown <- raw_lockdown %>%
    select(name, lockdown_date, lockdown)

df_usa <- raw_usa %>%
    arrange(date) %>%
    mutate(
        death_positive = death / positive,
        death_hospital = death / hospitalized,
        death_icu = death / inIcuCumulative,
        death_vent = death / onVentilatorCumulative,
        hospital_positive = hospitalized / positive,
        pos_inc_chg = positiveIncrease - lag(positiveIncrease),
        death_inc_chg = deathIncrease - lag(deathIncrease),
        pos_inc_chg_pct = (pos_inc_chg / positiveIncrease) * 100,
        death_inc_chg_pct = (death_inc_chg / deathIncrease) * 100
    ) 

df_state <- raw_state %>%
    group_by(state, date) %>%
    mutate(
        death_positive = death / positive,
        death_hospital = death / hospitalized,
        death_icu = death / inIcuCumulative,
        death_vent = death / onVentilatorCumulative,
        hospital_positive = hospitalized / positive
    ) %>%
    ungroup() %>%
    left_join(df_state_info, by = "state") %>%
    left_join(df_state_lockdown, by = "name") %>%
    mutate_at("lockdown", as_factor)

df_us_ny <- raw_state %>%
    mutate(is_ny = state == "NY") %>%
    group_by(date, is_ny) %>%
    summarize_at(c("positive", "negative", "death"), sum, na.rm = TRUE) %>%
    mutate(tests = positive + negative) %>%
    group_by(is_ny) %>%
    mutate(
        positive_change = positive - lag(positive),
        negative_change = negative - lag(negative),
        death_change = death - lag(death),
        tests_change = tests - lag(tests),
        death_positive = death / positive
    )

USA

library(plotly)
# raw_usa %>%
#     ggplot(aes(x = date, y = positive)) +
#     geom_line() +
#     theme_bg() +
#     theme(legend.position = "none")

df_usa %>%
    plot_ly(x = ~str_date, y = ~positive) %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Positive cases in USA

df_usa %>%
    plot_ly(x = ~str_date, y = ~positiveIncrease) %>%
    # add_lines() %>%
    add_bars() %>%
    layout(showlegend = FALSE)

Number of of new positive cases each day in USA

df_usa %>%
    plot_ly(x = ~str_date, y = ~pos_inc_chg) %>%
    add_lines() %>%
    # add_bars() %>%
    layout(showlegend = FALSE)

Rate of change of new positive cases each day

l <- lm(pos_inc_chg_pct ~ date, df_usa)

df_usa %>%
    filter(!is.na(pos_inc_chg_pct)) %>%
    plot_ly(x = ~str_date) %>%
    add_lines(y = ~pos_inc_chg_pct) %>%
    add_lines(y = fitted(l)) %>%
    layout(showlegend = FALSE)

Rate of change of new positive cases each day as percent of new positive cases

df_usa %>%
    plot_ly(x = ~str_date, y = ~death) %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Cummulative deaths in USA

df_usa %>%
    plot_ly(x = ~str_date, y = ~deathIncrease) %>%
    add_bars() %>%
    layout(showlegend = FALSE)

Number of new deaths each day in USA

df_usa %>%
    plot_ly(x = ~str_date, y = ~death_inc_chg) %>%
    add_lines() %>%
    # add_bars() %>%
    layout(showlegend = FALSE)

Rate of change of new deaths each day

l <- lm(death_inc_chg_pct ~ date, df_usa)

df_usa %>%
    filter(!is.na(death_inc_chg_pct)) %>%
    plot_ly(x = ~str_date) %>%
    add_lines(y = ~death_inc_chg_pct) %>%
    add_lines(y = fitted(l)) %>%
    layout(showlegend = FALSE)

Rate of change of new deaths each day as percent of new deaths

df_usa %>%
    plot_ly(x = ~str_date, y = ~death_positive) %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Deaths per positive cases

df_usa %>%
    plot_ly(x = ~str_date, y = ~hospitalized) %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Cummulative hospitalizations

df_usa %>%
    plot_ly(x = ~str_date, y = ~hospitalizedIncrease) %>%
    add_bars() %>%
    layout(showlegend = FALSE)

Number of new hospitalizations each day

df_usa %>%
    plot_ly(x = ~str_date, y = ~hospital_positive) %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Hospitalizations per positive cases

df_usa %>%
    plot_ly(x = ~str_date, y = ~death_hospital) %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Deaths per hospitalized cases

State

df_state %>%
    plot_ly(x = ~str_date, y = ~positive, color = ~state, colors = "Paired") %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Positive cases by state

df_state %>%
    filter(state != "NY") %>%
    ggplot(aes(x = date, y = positive, color = state)) +
    geom_line() +
    theme_bg() +
    theme(legend.position = "none")
df_state %>%
    plot_ly(x = ~str_date, y = ~positiveIncrease, color = ~state, colors = "Paired") %>%
    add_bars() %>%
    layout(showlegend = FALSE)

Number of new positive cases each day by state

df_state %>%
    plot_ly(x = ~str_date, y = ~death, color = ~state, colors = "Paired") %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Number of deaths by state

df_state %>%
    plot_ly(x = ~str_date, y = ~deathIncrease, color = ~state, colors = "Paired") %>%
    add_bars() %>%
    layout(showlegend = FALSE)

Number of new deaths each day by state

df_state %>%
    plot_ly(x = ~str_date, y = ~death_positive, color = ~state, colors = "Paired") %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Deaths per positive cases by state

df_state %>%
    plot_ly(x = ~str_date, y = ~hospitalized, color = ~state, colors = "Paired") %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Number of hospitalizations by state

df_state %>%
    plot_ly(x = ~str_date, y = ~hospitalizedIncrease, color = ~state, colors = "Paired") %>%
    add_bars() %>%
    layout(showlegend = FALSE)

Number of new hospitalizations each day by state

df_state %>%
    plot_ly(x = ~str_date, y = ~hospital_positive, color = ~state, colors = "Paired") %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Hospitalizations per positive case by state

df_state %>%
    plot_ly(x = ~str_date, y = ~death_hospital, color = ~state, colors = "Paired") %>%
    add_lines() %>%
    layout(showlegend = FALSE)

Deaths per hospitalized case by state

df_state %>%
    ggplot(aes(x = date, y = positiveIncrease)) +
    geom_col() +
    geom_vline(aes(xintercept = lockdown_date, linetype = lockdown)) +
    facet_wrap(~ state, scales = "free_y") +
    theme_bg() +
    theme(legend.position = "none")
Increase in daily cases after lockdown by state

Increase in daily cases after lockdown by state

NY vs. Rest of USA

df_us_ny %>%
    ggplot(aes(x = date, y = positive, color = is_ny)) +
    geom_line() +
    theme_bg()
Number of positive cases in NY vs. rest of USA

Number of positive cases in NY vs. rest of USA

df_us_ny %>%
    ggplot(aes(x = date, y = negative, color = is_ny)) +
    geom_line() +
    theme_bg()
Number of negative tests in NY vs. rest of USA

Number of negative tests in NY vs. rest of USA

df_us_ny %>%
    ggplot(aes(x = date, y = tests, color = is_ny)) +
    geom_line() +
    theme_bg()
Number of tests performed in NY vs. rest of USA

Number of tests performed in NY vs. rest of USA

df_us_ny %>%
    ggplot(aes(x = date, y = death, color = is_ny)) +
    geom_line() +
    theme_bg()
Number of deaths in NY vs. rest of USA

Number of deaths in NY vs. rest of USA

df_us_ny %>%
    ggplot(aes(x = date, y = positive_change, color = is_ny)) +
    geom_smooth() +
    geom_point() +
    theme_bg()
Number of of new positive cases each day in NY vs. rest of USA

Number of of new positive cases each day in NY vs. rest of USA

df_us_ny %>%
    ggplot(aes(x = date, y = death_change, color = is_ny)) +
    geom_smooth() +
    geom_point() +
    theme_bg()
Number of new deaths each day in NY vs. rest of USA

Number of new deaths each day in NY vs. rest of USA

df_us_ny %>%
    filter(death_positive > 0) %>%
    ggplot(aes(x = date, y = death_positive, color = is_ny)) +
    geom_line() +
    theme_bg()
Number of deaths per positive cases in NY vs. rest of USA

Number of deaths per positive cases in NY vs. rest of USA